/*===================================================================
datamaker.do
	*Started:	
	*Updated:	2021-08-13
	*Author(s):	David Phillips, Charlie Law
	*Purpose:	Combines underlying baseline and outcome data into
				a final analysis file.
===================================================================*/



***Main data

	*baseline assessment
		clear
		use "$datadir/assmt.dta", replace

	*merge on enrollments
		merge 1:1 HMISID using "$datadir/enrollments.dta"
		drop _merge
		
	*merge on infutor
		merge 1:1 HMISID using "$datadir/infutor.dta"
		drop _merge
		foreach var of varlist new_add* {
			replace `var' = . if infmatch == 0
		}
		
	*lagged values of HMIS
		merge 1:1 HMISID using "$datadir/lagged.dta"
		drop _merge

	*merge on payment amounts
		merge 1:1 HMISID using "$datadir/paymt.dta"
		drop _merge

	*analysis ready file
		save "$datadir/regdata.dta", replace
	
	
	*some cleaning and variable definitions
		label define treatment 0 "Control" 1 "Treatment" 
		label values treatment_sum treatment
		forvalues x = 1(1)24 {
			label variable new_add_`x' "New Address, `x' months after program start"
			label variable any_add_`x' "Any Address, `x' months after program start"
		}
		label variable new_addpre_12 "New Address, past 1 year"


		gen year = year(startdate)
		gen month = month(startdate)
		gen monthyear = year * 2017 + month - 1
		egen strata = group(month year agency)
		replace strata = 999 if strata == .
		
		gen difscore = first_assmt_score - 13
		gen difscoreXtreat = difscore * treatment_sum
		gen difscoreXeligible = difscore * eligible_hp
		gen difscoreXhp = difscore * hp_3mo
	
		gen main  = (rct_dt ~= . & eligible_other == 0)
		gen early = (rct_dt ~= . & eligible_other == 0 & rct_dt <td(01mar2020) )
		gen plac = (rct_dt ~= . & eligible_other == 1 & rct_dt<td(01mar2020) )
		gen late = (rct_dt ~= . & eligible_other == 0 & rct_dt>=td(01mar2020) )
		gen veryearly = (rct_dt ~= . & eligible_other == 0 & rct_dt <= date("3/1/2020","MDY") - 180)

		gen hispanic = (ethni == "Hispanic/Latino")
		gen white = (ethni ~= "Hispanic/Latino" & race == "White")
		drop black
		gen black = (ethni ~= "Hispanic/Latino" & race == "Black or African American")
	
	
	*calculating predicted risk 
		replace new_addpre_12 = 0 if infmatch == 0
		reg non_hp_6m $spdatlist $lagoutcomelist infmatch if startdate < td(01jul2019) & startdate >= td(01jul2017) & assmtscorez < 12
		predict yhat
		label var yhat "Predicted risk of homelessness"
		replace new_addpre_12 = . if infmatch == 0
		
		
	*labeling
		label var hispanic "Hispanic"
		label var white "Non-Hispanic White"
		label var black "Non-Hispanic Black"
		label var assmtscorez "Assessment score"
		label var agez  "Age"
		label var malez  "Male"
		label var disabledYz   "Disabled"
		label var unfithousYz  "Homeless, past 3 years"
		label var overcrowdYz "Housing overcrowded"
		label var owemoneyYz "Owe money"
		label var poorcreditYz "Poor credit"
		label var engtroubleYz  "Trouble with English"
		label var chronicheaYz "Chronic health condition"
		label var mental_heaYz  "Mental health condition"
		label var prisonNz  "Incarcerated, 6 mos"
		label var violenceYz "Violence threat, 6 mos"
		label var currentbalancz "Assets on hand"
		label var famtimeYm "No children"
		label var countofpriorengagement "HMIS count past year"
		label var priorengagementbinary "Any HMIS, 12 mos"
		label var countofpriorengagement_hp "HP Count past year"
		label var priorengagementbinary_hp  "Any prevention services, 12 mos"
		label var countofpriorengagement_nothp "Non-HP count past year"
		label var priorengagementbinary_nothp "Any homeless services, 12 mos"
		label var new_addpre_12 "Address change, 12 mos"
		label var legalYz "Legal problems"
		label var healthERNz "Num ER, 6 mos"
		label var famchangeYz "Household changed, 6 mos" 
	
	*save the analysis dataset
		save "$datadir/regdata.dta", replace
		
		


